master <- read.csv(file="https://raw.githubusercontent.com/ndelcamp/IST_718_Project/master/new_master.csv", sep = ',',header = TRUE)
#master_AS <- read.transactions("/Users/amroth/Data/new_master.csv", format = "basket",sep="," )

Load Packages

#install.packages("arules")
#install.packages("arulesViz", dependencies = TRUE)
#install.packages("robustbase")
#install.packages("arulesViz")
#install.packages("forecast")
#install.packages('tseries')
#library("arules")
library("arulesViz")
## Loading required package: arules
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
## Loading required package: grid
library(robustbase)
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:Matrix':
## 
##     expand
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:reshape':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library('ggplot2')
#library(forecast)
#library('tseries')
colnames(master)
##  [1] "country"                       "year"                         
##  [3] "sex"                           "age"                          
##  [5] "suicides_no"                   "population"                   
##  [7] "suicides.100k.pop"             "country.year"                 
##  [9] "HDI.for.year"                  "gdp_for_year...."             
## [11] "gdp_per_capita...."            "generation"                   
## [13] "CODE"                          "civilianFirearmsPer100Persons"
## [15] "region"                        "subregion"                    
## [17] "population2017"                "civilianFirearmsCount"        
## [19] "computationMethod"             "registeredFirearms"           
## [21] "unregisteredFirearms"          "Country.or.Area"              
## [23] "InternetUsers"                 "Population"                   
## [25] "Rank"                          "Percentage"                   
## [27] "PercentRank"

My brain works well this way, so i did it the hard way before figuring out how to do it a better way in my head.

#master.females <- master[master$sex == "female",]
#master.females.5 <- master.females[master.females$age == "5-14 years",]
#master.females.15 <- master.females[master.females$age == "15-24 years",]
#master.females.25 <- master.females[master.females$age == "25-34 years",]
#master.females.35 <- master.females[master.females$age == "35-54 years",]
#master.females.55 <- master.females[master.females$age == "55-74 years",]
#master.females.75 <- master.females[master.females$age == "75+ years",]

#master.males <- master[master$sex == "male",]
#master.males.5 <- master.males[master.males$age == "5-14 years",]
#master.males.15 <- master.males[master.males$age == "15-24 years",]
#master.males.25 <- master.males[master.males$age == "25-34 years",]
#master.males.35 <- master.males[master.males$age == "35-54 years",]
#master.males.55 <- master.males[master.males$age == "55-74 years",]
#master.males.75 <- master.males[master.males$age == "75+ years",]
master.females <- master[master$sex == "female",]
master.males <- master[master$sex == "male",]

master.data <- master %>%
  group_by(year) %>%
  summarise(population = sum(population), suicides = sum(suicides_no), mean.100k.pop = mean(suicides.100k.pop, na.rm = T)) 

master.means <- master %>%
  group_by(year,sex,age) %>%
  summarise(mean.100k.pop = mean(suicides.100k.pop, na.rm = T), mean.total.pop = mean(suicides_no, na.rm = T) )

master.means.f <- master.females %>%
  group_by(year,sex,age) %>%
  summarise(mean.100k.pop = mean(suicides.100k.pop, na.rm = T), mean.total.pop = mean(suicides_no, na.rm = T) )

master.means.m <- master.males %>%
  group_by(year,sex,age) %>%
  summarise(mean.100k.pop = mean(suicides.100k.pop, na.rm = T), mean.total.pop = mean(suicides_no, na.rm = T) )

master.means.f.overall <- master.females %>%
  group_by(year,sex) %>%
  summarise(mean.100k.pop = mean(suicides.100k.pop, na.rm = T), mean.total.pop = mean(suicides_no, na.rm = T) )

master.means.m.overall <- master.males %>%
  group_by(year,sex) %>%
  summarise(mean.100k.pop = mean(suicides.100k.pop, na.rm = T), mean.total.pop = mean(suicides_no, na.rm = T) )

master.country <- master %>%
  group_by(country,year) %>%
  summarise(mean.100k.pop = mean(suicides.100k.pop, na.rm = T), mean.total.pop = mean(suicides_no, na.rm = T), gdp_per_capita.... = mean(gdp_per_capita...., na.rm = T), mean.population = mean(population, na.rm = T) )


master.guns <- master %>%
  group_by(country,year) %>%
  summarise(mean.reg = mean(registeredFirearms, na.rm = T), mean.unreg = mean(unregisteredFirearms, na.rm = T) )

master.internet <- master %>%
  group_by(country) %>%
  summarise(InternetUsers = mean(InternetUsers, na.rm = T), Population = mean(Population, na.rm = T) )

master.internet.country <- master %>%
  group_by(country) %>%
  summarise(mean.100k.pop = mean(suicides.100k.pop, na.rm = T), mean.total.pop = mean(suicides_no, na.rm = T), gdp_per_capita.... = mean(gdp_per_capita...., na.rm = T), InternetUsers = mean(InternetUsers, na.rm = T), Population = mean(Population, na.rm = T) )

master.guns.country <- master %>%
  group_by(country) %>%
  summarise(mean.100k.pop = mean(suicides.100k.pop, na.rm = T), mean.total.pop = mean(suicides_no, na.rm = T), gdp_per_capita.... = mean(gdp_per_capita...., na.rm = T), registeredFirearms = mean(registeredFirearms, na.rm = T), unregisteredFirearms = mean(unregisteredFirearms, na.rm = T) )

master.internet.guns.country <- master %>%
  group_by(country) %>%
  summarise(mean.100k.pop = mean(suicides.100k.pop, na.rm = T), mean.total.pop = mean(suicides_no, na.rm = T), gdp_per_capita.... = mean(gdp_per_capita...., na.rm = T), registeredFirearms = mean(registeredFirearms, na.rm = T), unregisteredFirearms = mean(unregisteredFirearms, na.rm = T), InternetUsers = mean(InternetUsers, na.rm = T), Population = mean(Population, na.rm = T) )


table(master$age, master$generation)
##              
##               Boomers G.I. Generation Generation X Generation Z Millenials
##   15-24 years       0               0         2114            0       2528
##   25-34 years    1154               0         2682            0        806
##   35-54 years    3030               0          982            0          0
##   5-14 years        0               0          630         1470       2510
##   55-74 years     806             630            0            0          0
##   75+ years         0            2114            0            0          0
##              
##               Silent
##   15-24 years      0
##   25-34 years      0
##   35-54 years    630
##   5-14 years       0
##   55-74 years   3206
##   75+ years     2528
#To Do
#Add Internet Usage
#Look into Generation, Internet Usage, Guns, Region
#Forecasting
ggplot(master.data, aes(x = year, y = mean.100k.pop)) + geom_line(col = "deepskyblue3", size = 1) + geom_point(col = "deepskyblue3", size = 2) 

All Data Male suicide rates are higher than female suicide rates Suicide rates increase as they grow older Highest suicides per 100k populartion is males 75+ years old

#All Data
ggplotly(ggplot(master.means)+geom_line(aes(x=year, y=mean.100k.pop, group=age, color=age, linetype = sex)) + xlab("Years") + ylab("Suicides per 100k population") + ggtitle("Suicides per 100k population by gender and age range"))
ggplotly(ggplot(master.means)+geom_line(aes(x=year, y=mean.total.pop, group=age, color=sex))+ xlab("Years") + ylab("Suicides by total population") + ggtitle("Suicides by total population by gender and age range"))

Male Suicide Suiide rates were in a general decline after 1995 - 1996, but they are now slowly increasing.

ggplotly(ggplot(master.means.m)+geom_line(aes(x=year, y=mean.100k.pop, group=age, color=age))+ xlab("Years") + ylab("Suicides per 100k population") + ggtitle(" Male Suicides per 100k population by age range"))

Female Suicide Suicide rates are in a decline for females, but younger age groups (15-24 years, 25-34 years) are staying the same.

ggplotly(ggplot(master.means.f)+geom_line(aes(x=year, y=mean.100k.pop, group=age, color=age))+ xlab("Years") + ylab("Suicides per 100k population") + ggtitle(" Female Suicides per 100k population by age range"))

Countries First graph shows the mean Second graph shows that GDP per capita has been increasing.

ggplotly(ggplot(master.country)+geom_line(aes(x=year, y=mean.100k.pop, color=country))+ xlab("Years") + ylab("Suicides per 100k population") + ggtitle("Sucides per 100k population per country, year by year"))
ggplotly(ggplot(master.country)+geom_line(aes(x=year, y=gdp_per_capita...., color=country))+ xlab("Years") + ylab("Suicides per 100k population") + ggtitle("GDP per capita per country, year by year"))

Total Suicides in comparison to Internet Users Outliers: United States - Mean of Total Suicides - 2.779605e+03, Internet Users - 244,090,854 Brazil - Mean of Total Suicides - 6.091747e+02, Internet Users - 141,206,801 Japan - Mean of Total Suicides - 2.169091e+03, Internet Users - 115,845,120 Russia - Mean of Total Suicides - 3.733772e+03, Internet Users - 109,446,612

ggplotly(ggplot(master.internet.country, aes(x=mean.total.pop ,y=InternetUsers))+ geom_point(size=3) + geom_smooth(method=lm))

Suicides and Unregistered Guns Outliers: United States - Mean of Total Suicides - 2.779605e+03, Unregistered Firearms - 392,273,257 Russia - Mean of Total Suicides - 3.733772e+03, Unregistered Firearms - 11,020,000 Japan - Mean of total suicides - 2.169091e+03, Unregistered Firearms - 201,779

ggplotly(ggplot(master.guns.country, aes(x=mean.total.pop ,y=unregisteredFirearms))+ geom_point(size=3)+ geom_smooth(method=lm))
## Warning: Removed 20 rows containing non-finite values (stat_smooth).

Suicides and Registered Guns

ggplotly(ggplot(master.guns.country, aes(x=mean.total.pop ,y=registeredFirearms, color=country))+ geom_point(size=3) + geom_smooth(method=lm))
## Warning: Removed 20 rows containing non-finite values (stat_smooth).

Registered Firearms and Internet Users

ggplotly(ggplot(master.internet.guns.country, aes(x=registeredFirearms ,y=InternetUsers, color=country))+ geom_point(size=3))

Female Suicides

plot(master.means.f.overall$mean.total.pop)
abline(reg=lm(master.means.f.overall$mean.total.pop~time(master.means.f.overall$mean.total.pop)))

#adf.test(diff(log(master.means.f.overall$mean.total.pop)), alternative="stationary", k=0)

Male Suicides

plot(master.means.m.overall$mean.total.pop)
abline(reg=lm(master.means.m.overall$mean.total.pop~time(master.means.m.overall$mean.total.pop)))